## -----------------------------------------------------------------------------
##  Title:   Script to reproduce analysis reported in
##   Informing the Public: How Party Communication Builds Opportunity Structures 
##  Date:    August 2019
##  Journal: Political Communication
##
## -----------------------------------------------------------------------------
## Preliminary notes:
## -----------------------------------------------------------------------------
## 1) Assumed directory structure is (automatic table/figure saves
##                                    are disabled):
##
##  data/
##    ..ees-spitzen.csv
##    ..parties-spitzen.csv
##  paper-figures/
##  paper-tables/
##  R/
##    ..ees-spitzen.R
##

## 2) readme.txt contains brief variable descriptions (see paper SI)
## 3) for the ggplot theme_ipsum was used for the paper with custom fonts
##    those are changed to theme_minimal() here
##
## -----------------------------------------------------------------------------
## 0. Packages and data loading/preparation
## -----------------------------------------------------------------------------

# for RStudio warnings
# !diagnostics off

# rm(list = ls())

library("dplyr")
library("tidyr")
library("broom")

library("ggplot2")
library("gridExtra")
library("ggrepel")
library("texreg")

library("lme4")
library("arm")
library("merTools")
library("interplot")

sessionInfo()
# centering
two_sd <- function(x){
  (x - mean(x, na.rm = TRUE) + 0.000001) / 
    (2*sd(x, na.rm = TRUE) + 0.000001)
}

# merged individual + macro
ees <- read.csv("../data/ees-spitzen.csv",
	stringsAsFactors = FALSE,
	header = TRUE)

# party level dataset (helper in the future)
party <- read.csv("../data/parties-spitzen.csv",
                header = TRUE,
                stringsAsFactors = FALSE)

# (in text)
table(party$avail_manifesto)
table(party$avail_twitter)
table(party$avail_press)

# subset (see text as well): no party is present across both platforms in our data
ees <- filter(ees, countrycode != "Malta") # for Twitter would leave Malta-0 only

# Appendix: descriptive table 
desc_table_party <- ees %>% group_by(countrycode, party_id) %>%
  filter(party_id_code != 0) %>%
  summarize(n = n(),
            pname_twitter    = unique(pname_twitter),
            pname_manifesto  = unique(pname_manifesto),
            pname_press      = unique(pname_press),
            twitter_cands    = mean(cands, na.rm = TRUE),
            twitter_tw       = mean(tweets, na.rm = TRUE),
            twitter_cands_sp = mean(cands_mention, na.rm = TRUE),
            twitter_reach    = mean(reach_spitzen, na.rm = TRUE),
            man_sal          = mean(spitzen_man, na.rm = TRUE),
            n_press          = mean(n_releases, na.rm = TRUE),
            press_sal        = mean(spitzen_mention, na.rm = TRUE))
desc_table_party$name <- desc_table_party$pname_twitter
desc_table_party$name[is.na(desc_table_party$pname_twitter)] <- desc_table_party$pname_manifesto[is.na(desc_table_party$pname_twitter)]
desc_table_party$name[desc_table_party$party_id == 1250233] <- "NPA"
desc_table_party <- filter(desc_table_party, !is.na(name))
desc_table_party <- dplyr::select(desc_table_party, countrycode, n, name, twitter_cands:press_sal)
desc_table_party$man_sal <- round(desc_table_party$man_sal, 3)

# for export (needs some formatting in document)
# write.table(file = "../paper-tables/appendix-party_descriptives.txt",
#           desc_table_party, sep = "\t",
#           row.names = FALSE)

## -----------------------------------------------------------------------------
## 1. Modeling (reported in main text)
## Some appendix tables generated in this section, after fitting models
## -----------------------------------------------------------------------------

# bivariate fits: Twitter
m00_baseline_twitter <- glmer(rec_count/pot_correct ~ 1 + (1 | party_id), 
                              data = filter(ees, avail_twitter == 1 & !is.na(spitzen_tw_logratio_sd)),
                              weights = pot_correct, family = "binomial")
m00_twitter <- glmer(rec_count/pot_correct ~ 1 + spitzen_tw_logratio_sd + (1 | party_id), 
                     data = filter(ees, avail_twitter == 1 & !is.na(spitzen_tw_logratio_sd)),
                     weights = pot_correct, family = "binomial")
anova(m00_baseline_twitter, m00_twitter)

# bivariate fits: Manifesto
m00_baseline_manifesto <- glmer(rec_count/pot_correct ~ 1 + (1 | party_id), 
                              data = filter(ees, avail_manifesto == 1),
                              weights = pot_correct, family = "binomial")
m00_manifesto <- glmer(rec_count/pot_correct ~ 1 + spitzen_man_01 + (1 | party_id), 
                     data = filter(ees, avail_manifesto == 1),
                     weights = pot_correct, family = "binomial")
anova(m00_baseline_manifesto, m00_manifesto)

# bivariate fits: Press 
m00_baseline_press <- glmer(rec_count/pot_correct ~ 1 + (1 | party_id), 
                            data = filter(ees, avail_press == 1),
                            weights = pot_correct, family = "binomial")
m00_press <- glmer(rec_count/pot_correct ~ 1 + spitzen_press_rel + (1 | party_id), 
                   data = filter(ees, avail_press == 1),
                   weights = pot_correct, family = "binomial")
anova(m00_baseline_press, m00_press)

# Appendix (and in text footnotes)
# JW = Jaro-Winkler distance measure for the press release data (rather than exact search)
m00_press_jw <- glmer(rec_count/pot_correct ~ 1 + spitzen_press_rel_jw + (1 | party_id), 
                      data = filter(ees, avail_press == 1),
                      weights = pot_correct, family = "binomial")
anova(m00_baseline_press, m00_press_jw)

# Dichotomized press release measure (any mention, based on exact search)
m00_press_dich <- glmer(rec_count/pot_correct ~ 1 + spitzen_press_01 + (1 | party_id), 
                      data = filter(ees, avail_press == 1),
                      weights = pot_correct, family = "binomial")
anova(m00_baseline_press, m00_press_dich)
# Dichotomized press release measure (any mention, based on JW distance search)
m00_press_dich_jw <- glmer(rec_count/pot_correct ~ 1 + spitzen_press_jw_01 + (1 | party_id), 
                        data = filter(ees, avail_press == 1),
                        weights = pot_correct, family = "binomial")
anova(m00_baseline_press, m00_press_dich_jw) 

# Appendix: main table (for bivariate)
screenreg(l = list(m00_twitter, m00_manifesto, m00_press),
          custom.model.names = c("Twitter model", "Manifesto model", "Press model"),
          custom.coef.names = c("Intercept",
                                "Twitter reach", "Manifesto mention (=1)", "Press release salience"),
          caption = "Bivariate models (logit coefficients)",
          include.loglik = FALSE,
          custom.gof.names = c("AIC", "BIC",
            "N",
            "Parties",
            "(Var) Parties"))
# Appendix: alternative table (for bivariate)
screenreg(l = list(m00_twitter, m00_manifesto, m00_press_jw),
          custom.model.names = c("Twitter model", "Manifesto model", "Press model (JW)"),
          custom.coef.names = c("Intercept",
                                "Twitter reach", "Manifesto mention (=1)", "Press release salience"),
          caption = "Bivariate models (logit coefficients). JW = Jaro-Winkler text search.",
          include.loglik = FALSE,
          custom.gof.names = c("AIC", "BIC",
                               "N",
                               "Parties",
                               "(Var) Parties"))

# Export to file

# htmlreg(l = list(m00_twitter, m00_manifesto, m00_press),
#           custom.model.names = c("Twitter model", "Manifesto model", "Press model"),
#           custom.coef.names = c("Intercept",
#                                 "Twitter reach", "Manifesto mention (=1)", "Press release salience"),
#           caption = "Bivariate models (logit coefficients)",
#           include.loglik = FALSE,
#           custom.gof.names = c("AIC", "BIC",
#             "N",
#             "Parties",
#             "(Var) Parties"),
#           digits = 2,
#           stars = numeric(0),
#           bold = 0.05,
#           center = TRUE,
#           single.row = TRUE,
#         file = "../paper-tables/appendix_bivariate_models.doc")
# htmlreg(l = list(m00_twitter, m00_manifesto, m00_press_jw),
#           custom.model.names = c("Twitter model", "Manifesto model", "Press model (JW)"),
#           custom.coef.names = c("Intercept",
#                                 "Twitter reach", "Manifesto mention (=1)", "Press release salience"),
#           caption = "Bivariate models (logit coefficients). JW = Jaro-Winkler text search.",
#           include.loglik = FALSE,
#           custom.gof.names = c("AIC", "BIC",
#             "N",
#             "Parties",
#             "(Var) Parties"),
#           digits = 2,
#           stars = numeric(0),
#           bold = 0.05,
#           center = TRUE,
#           single.row = TRUE,
#         file = "../paper-tables/appendix_bivariate_models-jw.doc")

# Figure 1: bivariate summary plot
# tidy() will also work on the model output

# given these are bivariate models, hard-coded [2] for the one predictor of interest 

bivariate_res <- data.frame(
  mu = c(as.numeric(fixef(m00_twitter)[2]), # average coefficient
       as.numeric(fixef(m00_manifesto)[2]),
       as.numeric(fixef(m00_press)[2])),
  se = c(as.numeric(se.fixef(m00_twitter)[2]), # standard error of coef
       as.numeric(se.fixef(m00_manifesto)[2]),
       as.numeric(se.fixef(m00_press)[2])),
  k  = c(dim(coef(m00_twitter)$party_id)[1], # parties, including no-id groups
      dim(coef(m00_manifesto)$party_id)[1],
      dim(coef(m00_press)$party_id)[1]),
  n  = c(length(m00_twitter@resp$mu), # respondents
       length(m00_manifesto@resp$mu),
       length(m00_press@resp$mu)),
  coef = c("Twitter", "Manifesto", "Press\nreleases")
)
bivariate_res$lab <- paste0("N = ", 
                            bivariate_res$n, "\nJ = ", 
                            bivariate_res$k)

# bivariate plot (main results introduction)
fig1 <- ggplot(bivariate_res, aes(x = coef, y = mu, 
                          ymin = mu - 1.96*se, ymax = mu + 1.96*se,
                          label = lab, shape = coef)) +
  geom_linerange(colour = "grey30", size = 1) +
  geom_point(colour = "black", size = 4) +
  scale_y_continuous("", breaks = c(0, 0.5, 1), labels = c(0, 0.5, 1)) +
  geom_text(size = 3, colour = "grey20", vjust = 1.5) +
  geom_hline(yintercept = 0, colour = "grey50", linetype = 2) +
  theme_minimal() + scale_shape_discrete(guide = FALSE) +
  coord_flip() +
  labs(
    x = "",
    y = "Effect size",
    title    = "Spitzenkandidaten in party communication",
    caption  = "Logit coefficients with 95% confidence intervals. Party sample size includes non-identifier groups as well.",
    subtitle  = "Bivariate models of individual level recognition")

# export to file (pdf) or (png)
# note: final paper uses different theme and fonts

# ggsave(fig1, file = "../paper-figures/Figure-1-bivariate.pdf",
#        width = 8, height = 6)
# ggsave(fig1, file = "../paper-figures/Figure-1-bivariate.png",
#        width = 8, height = 6, dpi = 300)

# multivariate models (Table 3, Column 1 - for variable names, Column 0)
m10_baseline <- glmer(rec_count/pot_correct ~ 1 +
                      female + age_pc +
                      secondary + tertiary +
                      euk_pc + int_pc +
                      exposure + dis_pc +
                      eu_pc + new_pc +
                      p_spitz + countrycode +                       
                      (1 | party_id), 
                      data = filter(ees, avail_twitter == 1 & avail_manifesto == 1),
                      control = glmerControl(optimizer = "bobyqa",
                                             optCtrl = list(maxfun = 5e6)),
                      weights = pot_correct, family = "binomial")

# multivariate models (Table 3, Column 2)
m10_twitter <- glmer(rec_count/pot_correct ~ 1 +
                      female + age_pc +
                      secondary + tertiary +
                      euk_pc + int_pc +
                      exposure + dis_pc +
                      eu_pc + new_pc +
                      p_spitz + countrycode +
                      spitzen_tw_logratio_sd +
                      (1 | party_id), 
                      data = filter(ees, avail_twitter == 1 & avail_manifesto == 1),
                      control = glmerControl(optimizer = "bobyqa",
                                            optCtrl = list(maxfun = 5e6)),
                      weights = pot_correct, family = "binomial")
anova(m10_baseline, m10_twitter)

# multivariate models (Table 3, Column 3)
m10_manifesto <- glmer(rec_count/pot_correct ~ 1 +
                       female + age_pc +
                       secondary + tertiary +
                       euk_pc + int_pc +
                       exposure + dis_pc +
                       eu_pc + new_pc +
                       p_spitz + countrycode +
                       spitzen_man_01 +
                       (1 | party_id), 
                     data = filter(ees, avail_twitter == 1 & avail_manifesto == 1),
                     control = glmerControl(optimizer = "bobyqa",
                                            optCtrl = list(maxfun = 5e6)),
                     weights = pot_correct, family = "binomial")
anova(m10_baseline, m10_manifesto)

# multivariate models (Table 3, Column 4)
m10_both <- glmer(rec_count/pot_correct ~ 1 +
                         female + age_pc +
                         secondary + tertiary +
                         euk_pc + int_pc +
                         exposure + dis_pc +
                         eu_pc + new_pc +
                         p_spitz + countrycode +
                         spitzen_man_01 + spitzen_tw_logratio_sd  +
                         (1 | party_id), 
                       data = filter(ees, avail_twitter == 1 & avail_manifesto == 1),
                       control = glmerControl(optimizer = "bobyqa",
                                         optCtrl = list(maxfun = 5e6)),
                       weights = pot_correct, family = "binomial")
anova(m10_manifesto, m10_both)
anova(m10_twitter, m10_both)

# Press release multivariate models
# twitter needs re-centering, different sample now, fewer countries
# (using party level dataset as helper)

party_sub <- party %>% filter(avail_twitter == 1 & avail_manifesto == 1 & avail_press == 1) %>% 
  mutate(spitzen_twitter_sub = two_sd(spitzen_tw_logratio)) %>% 
  dplyr::select(ees, spitzen_twitter_sub) 

ees <- left_join(ees, party_sub, by = c("party_id_code" = "ees"))
ees$spitzen_twitter_sub[ees$party_id_code == 0] <- min(ees$spitzen_twitter_sub, na.rm = TRUE)

ees_sub <- ees %>% 
  filter(avail_twitter == 1 & avail_manifesto == 1 & avail_press == 1) %>% 
  mutate(spitzen_tw_logratio_sd = spitzen_twitter_sub)

# (Table 3, Column 5)
m20_baseline <- glmer(rec_count/pot_correct ~ 1 +
                        female + age_pc +
                        secondary + tertiary +
                        euk_pc + int_pc +
                        exposure + dis_pc +
                        eu_pc + new_pc +
                        p_spitz + countrycode +                       
                        (1 | party_id), 
                      data = ees_sub,
                      control = glmerControl(optimizer = "bobyqa",
                                             optCtrl = list(maxfun = 5e6)),
                      weights = pot_correct, family = "binomial")

# (Table 3, Column 6)
m20_press_only <- glmer(rec_count/pot_correct ~ 1 +
                        female + age_pc +
                        secondary + tertiary +
                        euk_pc + int_pc +
                        exposure + dis_pc +
                        eu_pc + new_pc +
                        p_spitz + countrycode +  
                        spitzen_press_rel +
                        (1 | party_id), 
                      data = ees_sub,
                      control = glmerControl(optimizer = "bobyqa",
                                             optCtrl = list(maxfun = 5e6)),
                      weights = pot_correct, family = "binomial")
anova(m20_baseline, m20_press_only)

# # (Table 3, Column 7)
m20_press_all <- glmer(rec_count/pot_correct ~ 1 +
                          female + age_pc +
                          secondary + tertiary +
                          euk_pc + int_pc +
                          exposure + dis_pc +
                          eu_pc + new_pc +
                          p_spitz + 
                          countrycode +       
                          spitzen_man_01 + 
                          spitzen_tw_logratio_sd + spitzen_press_rel +
                          (1 | party_id), 
                        data = ees_sub,
                        control = glmerControl(optimizer = "bobyqa",
                                               optCtrl = list(maxfun = 5e6)),
                        weights = pot_correct, family = "binomial")
anova(m20_baseline, m20_press_all)
anova(m20_press_only, m20_press_all)

# Generate Figure 2 (main text) and export Table 3 (needs customization in text)
# Figure 2 plot for effects
pred_dat_twitter <- expand.grid(
  spitzen_man_01 = 0,
  spitzen_tw_logratio_sd     = seq(-0.75, 0.75, 0.1),
  female = 0,
  age_pc = 0,
  secondary = 0,
  tertiary = 0,
  euk_pc = 0,
  int_pc = 0,
  exposure = 0,
  dis_pc = 0,
  eu_pc = 0,
  new_pc = 0,
  p_spitz = 0,
  countrycode = "Austria",
  party_id = 0
)
pred_dat_manifesto <- expand.grid(
  spitzen_man_01 = c(0, 1),
  spitzen_tw_logratio_sd = 0,
  female = 0,
  age_pc = 0,
  secondary = 0,
  tertiary = 0,
  euk_pc = 0,
  int_pc = 0,
  exposure = 0,
  dis_pc = 0,
  eu_pc = 0,
  new_pc = 0,
  p_spitz = 0,
  countrycode = "Austria",
  party_id = 0
)
pred_dat_manifesto_single <- expand.grid(
  spitzen_man_01 = c(0, 1),
  female = 0,
  age_pc = 0,
  secondary = 0,
  tertiary = 0,
  euk_pc = 0,
  int_pc = 0,
  exposure = 0,
  dis_pc = 0,
  eu_pc = 0,
  new_pc = 0,
  p_spitz = 0,
  countrycode = "Austria",
  party_id = 0
)

pred_dat_press_single <- expand.grid(
  spitzen_press_rel = seq(-0.4, 2.4, 0.1),
  female = 0,
  age_pc = 0,
  secondary = 0,
  tertiary = 0,
  euk_pc = 0,
  int_pc = 0,
  exposure = 0,
  dis_pc = 0,
  eu_pc = 0,
  new_pc = 0,
  p_spitz = 0,
  countrycode = "Austria",
  party_id = 0
)

x <- predictInterval(m10_both, newdata = pred_dat_twitter, which = "fixed",
                     include.resid.var = FALSE, level = 0.95,
                     n.sims = 10000, type = "probability")

pred_dat_twitter <- cbind(pred_dat_twitter, x)
pred_dat_twitter <- dplyr::select(pred_dat_twitter, spitzen_tw_logratio_sd, fit, upr, lwr)

x <- predictInterval(m10_both, newdata = pred_dat_manifesto, which = "fixed",
                     include.resid.var = FALSE, level = 0.95,
                     n.sims = 10000, type = "probability")
pred_dat_manifesto <- cbind(pred_dat_manifesto, x)
pred_dat_manifesto <- dplyr::select(pred_dat_manifesto, spitzen_man_01, fit, upr, lwr)
pred_dat_manifesto$what <- "with Twitter"

x <- predictInterval(m10_manifesto, newdata = pred_dat_manifesto_single, which = "fixed",
                     include.resid.var = FALSE, level = 0.95,
                     n.sims = 10000, type = "probability")
pred_dat_manifesto_single <- cbind(pred_dat_manifesto_single, x)
pred_dat_manifesto_single <- dplyr::select(pred_dat_manifesto_single, spitzen_man_01, fit, upr, lwr)
pred_dat_manifesto_single$what <- "without Twitter"

pred_dat_manifesto <- rbind(pred_dat_manifesto, pred_dat_manifesto_single)

x <- predictInterval(m20_press_only, newdata = pred_dat_press_single, which = "fixed",
                     include.resid.var = FALSE, level = 0.95,
                     n.sims = 10000, type = "probability")
pred_dat_press_single <- cbind(pred_dat_press_single, x)
pred_dat_press_single <- dplyr::select(pred_dat_press_single, spitzen_press_rel, fit, upr, lwr)


p_twitter <- ggplot(pred_dat_twitter, 
                    aes(x = spitzen_tw_logratio_sd, y = fit,
                        ymin = lwr, ymax = upr)) +
  geom_vline(xintercept = 0, linetype = 2, alpha = 0.5) +
  geom_line(size = 1.2) + geom_ribbon(fill = "grey70", alpha = 0.25) +
  scale_y_continuous(breaks = c(0, 0.25, 0.65), labels = c(0, 0.25, 0.65),
                     limits = c(0, 0.65)) +
  scale_x_continuous(breaks = c(-0.7527, 0.69),
                     labels = c("No\nTwitter reach", "Maximum\nTwitter reach")) +
  theme_minimal() +
  labs(
    x = "",
    y = "Recognition proportion",
    title    = "a. Twitter reach",
    caption  = "",
    subtitle  = "")

p_man <- ggplot(pred_dat_manifesto, 
                aes(x = factor(spitzen_man_01), y = fit,
                    ymin = lwr, ymax = upr, 
                    shape = what, colour = what)) +
  geom_linerange(position = position_dodge(width = 0.25)) + 
  geom_point(position = position_dodge(width = 0.25), size = 2)  +
  scale_y_continuous(breaks = c(0, 0.25, 0.65), labels = c(0, 0.25, 0.65),
                     limits = c(0, 0.65)) +
  scale_color_manual("", guide = FALSE, values = c("black", "grey50")) +
  scale_shape_discrete("") +
  scale_x_discrete(breaks = c(0, 1),
                   labels = c("No mention", "Some mention")) +
  theme_minimal() +
  labs(
    x = "",
    y = "",
    title    = "b. Euromanifesto mentions",
    caption  = "",
    subtitle  = "") +
  theme(legend.position = c(0.125, 0.9))

p_press <- ggplot(pred_dat_press_single, 
                  aes(x = spitzen_press_rel, y = fit,
                      ymin = lwr, ymax = upr)) +
  geom_vline(xintercept = 0, linetype = 2, alpha = 0.5) +
  geom_line(size = 1.2) + geom_ribbon(fill = "grey70", alpha = 0.25) +
  scale_y_continuous(breaks = c(0, 0.25, 0.65), labels = c(0, 0.25, 0.65),
                     limits = c(0, 0.65)) +
  scale_x_continuous(breaks = c(-0.39, 2.4),
                     labels = c("No mentions", 
                                "Maximum\nmentions")) +
  expand_limits(x = c(1, 2.75)) +
  theme_minimal() +
  labs(
    x = "",
    y = "",
    title    = "c. Press release mentions",
    subtitle  = "")

# Figure 2
fig2 <- grid.arrange(p_twitter, p_man, p_press, ncol = 3,
             bottom="Note: Example predicted values calculated for average-aged Austrian man with lower than secondary education, no exposure to the campaign, 
    with average discussion frequency, news consumption and EU position. He is not a supporter of a party being behind a lead candidate. 
             For Twitter reach, party manifesto salience is fixed at 0. For manifesto salience with Twitter control (model: Both), Twitter reach fixed at sample mean. 
             Party-specific random effects collapsed (discarded), as hypothetical example. Shaded area and line ranges for 95% confidence intervals.")

# export to file

# ggsave(fig2, file = "../paper-figures/Figure-2-slopes.pdf",
#        width = 12, height = 6)
# ggsave(fig2, file = "../paper-figures/Figure-2-slopes.png",
#        width = 12, height = 6)

# export to file main table (Table 2)
# htmlreg(
#   file = "../paper-tables/main-table-2.doc",
#   c(m10_baseline, m10_twitter, m10_manifesto, m10_both, m20_baseline, m20_press_only, m20_press_all),
#   custom.model.names = c(
#     "Baseline (all)",
#     "Twitter reach (all)",
#     "Manifesto salience (all)",
#     "Both (all)",
#     "Baseline (subsample)",
#     "Press release salience (subsample)",
#     "All three (subsample)"),
#   digits = 2,
#   stars = numeric(0),
#   bold = 0.05,
#   caption = "Notes: (Restricted) Maximum Likelihood estimates of logit coefficients with standard errors in parentheses.",
#   omit.coef = "countrycode",
#   center = TRUE,
#   include.loglik = FALSE,
#   single.row = TRUE,
#   custom.gof.names = c(
#     "AIC",
#     "BIC",
#     "N",
#     "Parties",
#     "(Var) Parties"),
#   custom.coef.names = c(
#     "Intercept",
#     "Sex (Female = 1)",
#     "Age",
#     "Secondary education",
#     "Tertiary education",
#     "EU knowledge",
#     "Interest in politics",
#     "Campaign exposure",
#     "Political discussion",
#     "EU position",
#     "News consumption",
#     "Party of lead candidate",
#     "Twitter reach",
#     "Manifesto salience",
#     "Press release salience")
# )

# Correlation between three different platforms (reported in text)
# for those where manifesto and twitter are available (and excluded non-id)
for_cor <- ees %>%
  filter(avail_twitter == 1 & avail_manifesto == 1 & party_id_code != 0) %>%
  group_by(party_id) %>%
  summarize(spitzen_tw_logratio = unique(spitzen_tw_logratio),
            spitzen_man_01 = unique(spitzen_man_01))
cor(for_cor$spitzen_tw_logratio, for_cor$spitzen_man_01, method = "spearman")

# for those where all three platforms are available (and excluded non-id)
for_cor <- ees %>%
  filter(avail_twitter == 1 & avail_manifesto == 1 & party_id_code != 0 & avail_press == 1) %>%
  group_by(party_id) %>%
  summarize(spitzen_tw_logratio = unique(spitzen_tw_logratio),
            spitzen_man_01 = unique(spitzen_man_01),
            spitzen_press  = unique(spitzen_press_rel))

cor(for_cor[,2:4], method = "spearman")

## -----------------------------------------------------------------------------
## 2. Models in Sensitivity checks: detailed look at the opportunity structure
## Table results in SI with main text plot at the end (some tables formatted in text)
## -----------------------------------------------------------------------------

# Empty model: get differences in coef (0 as no id, 0 as no focus, rest)

m00_empty <- glmer(rec_count/pot_correct ~ 1 +
                     (1 | party_id), 
                   data = filter(ees, avail_twitter == 1 & avail_manifesto == 1),
                   control = glmerControl(optimizer = "bobyqa",
                                          optCtrl = list(maxfun = 5e6)),
                   weights = pot_correct, family = "binomial")

check1 <- arm::invlogit(coef(m00_empty)$party_id)
check1$party_id <- rownames(check1)
party_merge <- dplyr::select(party, ees, spitzen_man_01, spitzen_tw_logratio)
party_merge$ees <- as.character(party_merge$ees)
check1 <- left_join(check1, party_merge, by = c("party_id" = "ees"))
check1$twitter <- ifelse(check1$spitzen_tw_logratio == 0, 0, 1)
names(check1)[1] <- "recog"

# Ruling out partisan effect (in text)
check1 %>% filter(is.na(twitter)) %>% summarize(mean(recog)) # non-id mean

# party-id w 0 on twitter
check1 %>% filter(!is.na(twitter) & spitzen_tw_logratio == 0) %>% summarize(m = mean(recog),
                                                                            n = n())
# party-id w. 0 on manifesto
check1 %>% filter(!is.na(twitter) & spitzen_man_01 == 0) %>% summarize(m = mean(recog),
                                                                            n = n())
# party-id with larger than 0 twitter
check1 %>% filter(!is.na(twitter) & spitzen_tw_logratio != 0) %>% summarize(m = mean(recog),
                                                                            n = n())
# party-id with larger than 0 manifesto
check1 %>% filter(!is.na(twitter) & spitzen_man_01 != 0) %>% summarize(m = mean(recog),
                                                                       n = n())

# model without no-identifiers
m10_test1 <- glmer(rec_count/pot_correct ~ 1 +
                     female + age_pc +
                     secondary + tertiary +
                     euk_pc + int_pc +
                     exposure + dis_pc +
                     eu_pc + new_pc +
                     p_spitz + countrycode +
                     spitzen_man_01 + spitzen_tw_logratio_sd +
                     (1 | party_id), 
                   data = filter(ees, avail_twitter == 1 & avail_manifesto == 1 & party_id_code != 0),
                   control = glmerControl(optimizer = "bobyqa",
                                          optCtrl = list(maxfun = 5e6)),
                   weights = pot_correct, family = "binomial")
summary(m10_test1)

# start building future results data frame for all robustness checks
robust_figure <- data.frame(what     = rep("1. Partisan only", 2),
                            platform = c("Twitter", "Euromanifesto"),
                            coef = c(fixef(m10_test1)["spitzen_tw_logratio_sd"],
                                     fixef(m10_test1)["spitzen_man_01"]),
                            se   = c(se.fixef(m10_test1)["spitzen_tw_logratio_sd"],
                                     se.fixef(m10_test1)["spitzen_man_01"]))

# model with eu knowledge as outcome
ees$euknow <- ees$euknow*2 # from previously recoded back to 0, 1, 2 correct answers

m10_test2 <- glmer(euknow/eu_correct ~ 1 +
                     female + age_pc +
                     secondary + tertiary +
                     int_pc +
                     exposure + dis_pc +
                     eu_pc + new_pc +
                     p_spitz + countrycode +
                     spitzen_man_01 + spitzen_tw_logratio_sd +
                     (1 | party_id), 
                   data = filter(ees, avail_twitter == 1 & avail_manifesto == 1),
                   control = glmerControl(optimizer = "bobyqa",
                                          optCtrl = list(maxfun = 5e6)),
                   weights = eu_correct, family = "binomial")
summary(m10_test2)

# add to robustness checks data frame
robust_figure <- rbind(robust_figure, data.frame(what     = rep("2. EU knowledge", 2),
                                                 platform = c("Twitter", "Euromanifesto"),
                                                 coef = c(fixef(m10_test2)["spitzen_tw_logratio_sd"],
                                                          fixef(m10_test2)["spitzen_man_01"]),
                                                 se   = c(se.fixef(m10_test2)["spitzen_tw_logratio_sd"],
                                                          se.fixef(m10_test2)["spitzen_man_01"])))

# run two alternative Twitter reach measures
# proportion of candidates
m10_test3 <- glmer(rec_count/pot_correct ~ 1 +
                     female + age_pc +
                     secondary + tertiary +
                     euk_pc + int_pc +
                     exposure + dis_pc +
                     eu_pc + new_pc +
                     p_spitz + countrycode +
                     spitzen_man_01 + spitzen_tw_cands +
                     (1 | party_id), 
                   data = filter(ees, avail_twitter == 1 & avail_manifesto == 1),
                   control = glmerControl(optimizer = "bobyqa",
                                          optCtrl = list(maxfun = 5e6)),
                   weights = pot_correct, family = "binomial")
summary(m10_test3)
robust_figure <- rbind(robust_figure, data.frame(what     = rep("3. Proportion of candidates", 2),
                                                 platform = c("Twitter", "Euromanifesto"),
                                                 coef = c(fixef(m10_test3)["spitzen_tw_cands"],
                                                          fixef(m10_test3)["spitzen_man_01"]),
                                                 se   = c(se.fixef(m10_test3)["spitzen_tw_cands"],
                                                          se.fixef(m10_test3)["spitzen_man_01"])))

# proportion of tweets
m10_test4 <- glmer(rec_count/pot_correct ~ 1 +
                     female + age_pc +
                     secondary + tertiary +
                     euk_pc + int_pc +
                     exposure + dis_pc +
                     eu_pc + new_pc +
                     p_spitz + countrycode +
                     spitzen_man_01 + spitzen_tw_tweets +
                     (1 | party_id), 
                   data = filter(ees, avail_twitter == 1 & avail_manifesto == 1),
                   control = glmerControl(optimizer = "bobyqa",
                                          optCtrl = list(maxfun = 5e6)),
                   weights = pot_correct, family = "binomial")
summary(m10_test4)
robust_figure <- rbind(robust_figure, data.frame(what     = rep("4. Proportion of tweets", 2),
                                                 platform = c("Twitter", "Euromanifesto"),
                                                 coef = c(fixef(m10_test4)["spitzen_tw_tweets"],
                                                          fixef(m10_test4)["spitzen_man_01"]),
                                                 se   = c(se.fixef(m10_test4)["spitzen_tw_tweets"],
                                                          se.fixef(m10_test4)["spitzen_man_01"])))

# run a three level specification 
m10_test5 <- glmer(rec_count/pot_correct ~ 1 +
                     female + age_pc +
                     secondary + tertiary +
                     euk_pc + int_pc +
                     exposure + dis_pc +
                     eu_pc + new_pc +
                     p_spitz + 
                     spitzen_man_01 + spitzen_tw_logratio_sd +
                     (1 | countrycode/party_id), 
                   data = filter(ees, avail_twitter == 1 & avail_manifesto == 1),
                   control = glmerControl(optimizer = "bobyqa",
                                          optCtrl = list(maxfun = 5e6)),
                   weights = pot_correct, family = "binomial")
summary(m10_test5)
robust_figure <- rbind(robust_figure, data.frame(what     = rep("5. Three-level specification", 2),
                                                 platform = c("Twitter", "Euromanifesto"),
                                                 coef = c(fixef(m10_test5)["spitzen_tw_logratio_sd"],
                                                          fixef(m10_test5)["spitzen_man_01"]),
                                                 se   = c(se.fixef(m10_test5)["spitzen_tw_logratio_sd"],
                                                          se.fixef(m10_test5)["spitzen_man_01"])))

# plot figure figure based on the robustness checks
robust_figure$what <- as.character(robust_figure$what)
fig3 <- ggplot(robust_figure, 
       aes(x = factor(what), y = coef,
           ymin = coef - 1.96*se, ymax = coef + 1.96*se)) +
  geom_hline(yintercept = 0, linetype = 2, alpha = 0.25) +
  geom_linerange() + 
  geom_point(size = 2)  +
  facet_wrap(~platform) +
  coord_flip() + scale_x_discrete(limits = rev(levels(factor(robust_figure$what)))) +
  theme_minimal() +
  labs(
    x = "",
    y = "",
    title    = "Spitzenkandidaten in party communication",
    caption  = "Logit coefficients with 95% confidence intervals.",
    subtitle  = "Robustness check and placebo test summary")

# export to file

# ggsave(fig3, file = "../paper-figures/Figure-3-robust.pdf",
#        width = 8, height = 6)
# ggsave(fig3, file = "../paper-figures/Figure-3-robust.png",
#        width = 8, height = 6, dpi = 300)


# Age related heterogeneity
# age group, with two thresholds (1 = young 35 and younger, 1 = 25 and younger)

ees$age_dich_1 <- ifelse(ees$age*99 < 36, 1, 0) 
ees$age_dich_2 <- ifelse(ees$age*99 < 26, 1, 0)

## find and make sure we have within each party-id (or non-id) group
## age variation (for the dichotomous variables)
ees <- ees %>% group_by(party_id) %>% 
  mutate(is_dich1_variant = length(unique(age_dich_1)),
         is_dich2_variant = length(unique(age_dich_2)))

ees_sub_age_1 <- filter(ees, is_dich1_variant > 1)
ees_sub_age_2 <- filter(ees, is_dich2_variant > 1)

m10_test6 <- glmer(rec_count/pot_correct ~ 1 +
                     age_dich_1 +
                     female + 
                     secondary + tertiary +
                     euk_pc + int_pc +
                     exposure + dis_pc +
                     eu_pc + new_pc +
                     p_spitz + countrycode +
                     spitzen_man_01 +
                     spitzen_tw_logratio_sd*age_dich_1 +
                     (1 + age_dich_1 | party_id), 
                   data = filter(ees_sub_age_1, avail_twitter == 1 & avail_manifesto == 1),
                   control = glmerControl(optimizer = "bobyqa",
                                          optCtrl = list(maxfun = 5e6)),
                   weights = pot_correct, family = "binomial")
m10_test7 <- glmer(rec_count/pot_correct ~ 1 +
                     age_dich_2 +
                     female + 
                     secondary + tertiary +
                     euk_pc + int_pc +
                     exposure + dis_pc +
                     eu_pc + new_pc +
                     p_spitz + countrycode +
                     spitzen_man_01 +
                     spitzen_tw_logratio_sd*age_dich_2 +
                     (1 + age_dich_2 | party_id), 
                   data = filter(ees_sub_age_2, avail_twitter == 1 & avail_manifesto == 1),
                   control = glmerControl(optimizer = "bobyqa",
                                          optCtrl = list(maxfun = 5e6)),
                   weights = pot_correct, family = "binomial")

p1 <- interplot(m10_test6, "spitzen_tw_logratio_sd", "age_dich_1", plot = FALSE)
p2 <- interplot(m10_test7, "spitzen_tw_logratio_sd", "age_dich_2", plot = FALSE)

age_plot_1 <- ggplot(p1, aes(x = factor(age_dich_1), y = coef,
                             ymin = ub, ymax = lb, group = 1)) +
  geom_linerange() + 
  geom_point(size = 2)  +
  geom_line(alpha = 0.25, linetype = 2) +
  theme_minimal() +
  scale_x_discrete(labels = c("Above 35", "35 and below")) +
  labs(
    x = "",
    y = "Effect of (2 SD) Twitter reach change",
    title    = "Conditional effect of Twitter reach on age",
    caption  = "",
    subtitle  = "35 year age split")

age_plot_2 <- ggplot(p2, aes(x = factor(age_dich_2), y = coef,
                             ymin = ub, ymax = lb, group = 1)) +
  geom_linerange() + 
  geom_point(size = 2)  +
  geom_line(alpha = 0.25, linetype = 2) +
  theme_minimal() +
  scale_x_discrete(labels = c("Above 25", "25 and below")) +
  labs(
    x = "",
    y = "Effect of (2 SD) Twitter reach change",
    title    = "",
    caption  = "",
    subtitle  = "25 year age split")

fig4 <- grid.arrange(age_plot_1, age_plot_2, ncol = 2)

# ggsave(fig4, file = "../paper-figures/Figure-4-age.pdf",
#        width = 8, height = 6)
# ggsave(fig4, file = "../paper-figures/Figure-4-age.png",
#        width = 8, height = 6, dpi = 300)

# export model results for appendix
# htmlreg(
#   file = "../paper-tables/appendix-robust_models.doc",
#   c(m10_test1, m10_test2, m10_test3, m10_test4, m10_test5),
#   custom.model.names = c("Partisans only", "EU knowledge", "Proportion candidates", "Proportion Tweets", "3-level model"),
#   caption = "",
#   omit.coef = "countrycode",
#   digits = 2,
#   stars = numeric(0),
#   bold = 0.05,
#   single.row = TRUE,
#   include.loglik = FALSE,
#   custom.gof.names = c(
#     "AIC",
#     "BIC",
#     "N",
#     "Parties",
#     "(Var) Parties",
#     "Parties",
#     "Countries",
#     "(Var) Parties in country",
#     "(Var) Country"),
#   custom.coef.names = c(
#     "Intercept",
#     "Sex (Female = 1)",
#     "Age",
#     "Secondary education",
#     "Tertiary education",
#     "EU knowledge",
#     "Interest in politics",
#     "Campaign exposure",
#     "Political discussion",
#     "EU position",
#     "News consumption",
#     "Party of lead candidate",
#     "Manifesto salience",
#     "Twitter reach",
#     "Twitter: proportion of cands.",
#     "Twitter: proportion of tweets")
# )
# htmlreg(
#   file = "../paper-tables/appendix-robustmodels_age.doc",
#   c(m10_test6, m10_test7),
#   custom.model.names = c("Age group, 35 split", "Age group, 25 split"),
#   caption = "",
#   omit.coef = "countrycode",
#   digits = 2,
#   stars = numeric(0),
#   bold = 0.05,
#   single.row = TRUE,
#   include.loglik = FALSE,
#   custom.gof.names = c(
#     "AIC",
#     "BIC",
#     "N",
#     "Parties",
#     "(Var) Parties (intercept)",
#     "(Var) Parties (Age <= 35)",
#     "(Cov) Intercept (Age <= 35)",
#     "(Var) Parties (Age <= 25)",
#     "(Cov) Intercept (Age <= 25)"),
#   custom.coef.names = c(
#     "Intercept",
#     "Age (<=35)",
#     "Sex (Female = 1)",
#     "Secondary education",
#     "Tertiary education",
#     "EU knowledge",
#     "Interest in politics",
#     "Campaign exposure",
#     "Political discussion",
#     "EU position",
#     "News consumption",
#     "Party of lead candidate",
#     "Manifesto salience",
#     "Twitter reach",
#     "Age (<=35): Twitter reach",
#     "Age (<=25)",
#     "Age (<=25): Twitter reach")
# )

## -----------------------------------------------------------------------------
## 3. Some additional SI material (mostly SI4) and numbers referenced in 
##    the text (i.e. correlations)
## -----------------------------------------------------------------------------

tw_vote <- ees %>% filter(avail_twitter == 1 & party_id_code != 0) %>% group_by(party_id_code) %>% 
  summarize(tw_vote = unique(twitter_vote)) %>% 
  na.omit(.)
part_cor <- left_join(party, tw_vote, by = c("ees" = "party_id_code"))
cor(part_cor[, c("spitzen_tw_logratio", "spitzen_tw_cands",
                 "spitzen_tw_tweets", "tw_vote")], method = "spearman", use = "complete")

# press release check with Jaro-Winkler search, rather than exact search
# mimics last two models from Table 3
m20_press_jw_only <- glmer(rec_count/pot_correct ~ 1 +
                          female + age_pc +
                          secondary + tertiary +
                          euk_pc + int_pc +
                          exposure + dis_pc +
                          eu_pc + new_pc +
                          p_spitz + countrycode +  
                          spitzen_press_rel_jw +
                          (1 | party_id), 
                        data = ees_sub,
                        control = glmerControl(optimizer = "bobyqa",
                                               optCtrl = list(maxfun = 5e6)),
                        weights = pot_correct, family = "binomial")
m20_press_jw_all <- glmer(rec_count/pot_correct ~ 1 +
                         female + age_pc +
                         secondary + tertiary +
                         euk_pc + int_pc +
                         exposure + dis_pc +
                         eu_pc + new_pc +
                         p_spitz + 
                         countrycode +       
                         spitzen_man_01 + 
                         spitzen_tw_logratio_sd + spitzen_press_rel_jw +
                         (1 | party_id), 
                       data = ees_sub,
                       control = glmerControl(optimizer = "bobyqa",
                                              optCtrl = list(maxfun = 5e6)),
                       weights = pot_correct, family = "binomial")
summary(m20_press_jw_all)
summary(m20_press_jw_only)

screenreg(c(m20_press_jw_only, m20_press_jw_all))

# htmlreg(
#   file = "../paper-tables/appendix-press_jw.doc",
#   c(m20_press_jw_only, m20_press_jw_all),
#   custom.model.names = c(
#     "Press release salience, JW (subsample)",
#     "All three, JW (subsample)"),
#   digits = 2,
#   stars = numeric(0),
#   bold = 0.05,
#   caption = "",
#   omit.coef = "countrycode",
#   center = TRUE,
#   include.loglik = FALSE,
#   single.row = TRUE,
#   custom.gof.names = c(
#     "AIC",
#     "BIC",
#     "N",
#     "Parties",
#     "(Var) Parties"),
#   custom.coef.names = c(
#     "Intercept",
#     "Sex (Female = 1)",
#     "Age",
#     "Secondary education",
#     "Tertiary education",
#     "EU knowledge",
#     "Interest in politics",
#     "Campaign exposure",
#     "Political discussion",
#     "EU position",
#     "News consumption",
#     "Party of lead candidate",
#     "Twitter reach",
#     "Manifesto salience",
#     "Press release salience (JW)")
# )

# Manual edit needed (deletion of coefficients)
# htmlreg(
#   file = "../paper-tables/appendix-table_countries.doc",
#   c(m10_baseline, m10_twitter, m10_manifesto, m10_both, m20_baseline, m20_press_only, m20_press_all),
#   custom.model.names = c(
#     "Baseline (all)",
#     "Twitter reach (all)",
#     "Manifesto mentions (all)",
#     "Both (all)",
#     "Baseline (subsample)",
#     "Press release salience (subsample)",
#     "All three (subsample)"),
#   digits = 2,
#   stars = numeric(0),
#   bold = 0.05,
#   center = TRUE,
#   include.loglik = FALSE,
#   single.row = TRUE
# )

# Separate Twitter alternatives: vote scaled measure
tw_vote_temp <- ees %>% filter(party_id_code != 0) %>% 
  group_by(party_id) %>% 
  summarize(tw_vote = unique(twitter_vote)) %>% 
  ungroup() %>% 
  mutate(tw_vote = two_sd(tw_vote))

ees_check <- left_join(ees, tw_vote_temp, by = "party_id")
ees_check$tw_vote[ees_check$party_id_code == 0] <- min(ees_check$tw_vote, na.rm = TRUE)

m10_alt <- glmer(rec_count/pot_correct ~ 1 +
                   female + age_pc +
                   secondary + tertiary +
                   euk_pc + int_pc +
                   exposure + dis_pc +
                   eu_pc + new_pc +
                   p_spitz + countrycode +
                   spitzen_man_01 + tw_vote +
                   (1 | party_id),
                 data = filter(ees_check, avail_twitter == 1 & avail_manifesto == 1),
                 control = glmerControl(optimizer = "bobyqa",
                                        optCtrl = list(maxfun = 5e6)),
                 weights = pot_correct, family = "binomial")

party_sub <- filter(party, avail_twitter == 1 & avail_manifesto == 1 & avail_press == 1)
tw_vote_temp <- ees %>% filter(party_id_code != 0) %>% 
  group_by(party_id) %>%
  summarize(tw_vote = unique(twitter_vote))

tw_vote_temp$party_id <- as.numeric(tw_vote_temp$party_id)
party_sub <- left_join(party_sub, tw_vote_temp, by = c("ees" = "party_id"))
party_sub$tw_vote <- two_sd(party_sub$tw_vote)
party_sub <- dplyr::select(party_sub, ees, tw_vote)
ees_c <- left_join(ees, party_sub, by = c("party_id_code" = "ees"))
ees_c$tw_vote[ees_c$party_id_code == 0] <- min(ees_c$tw_vote, na.rm = TRUE)
ees_sub <- filter(ees_c, avail_twitter == 1 & avail_manifesto == 1 & avail_press == 1)

m20_press_alt <- glmer(rec_count/pot_correct ~ 1 +
                         female + age_pc +
                         secondary + tertiary +
                         euk_pc + int_pc +
                         exposure + dis_pc +
                         eu_pc + new_pc +
                         p_spitz +
                         countrycode +
                         spitzen_man_01 +
                         tw_vote + spitzen_press_rel +
                         (1 | party_id),
                       data = ees_sub,
                       control = glmerControl(optimizer = "bobyqa",
                                              optCtrl = list(maxfun = 5e6)),
                       weights = pot_correct, family = "binomial")
# htmlreg(
#   file = "../paper-tables/appendix-vote.doc",
#   c(m10_alt, m20_press_alt),
#   custom.model.names = c(
#     "Both (all)",
#     "All three (subsample)"),
#   digits = 2,
#   stars = numeric(0),
#   bold = 0.05,
#   caption = "Notes: (Restricted) Maximum Likelihood estimates of logit coefficients with standard errors in parentheses.",
#   omit.coef = "countrycode",
#   center = TRUE,
#   include.loglik = FALSE,
#   single.row = TRUE,
#   custom.gof.names = c(
#     "AIC",
#     "BIC",
#     "N",
#     "Parties",
#     "(Var) Parties"),
#   custom.coef.names = c(
#     "Intercept",
#     "Sex (Female = 1)",
#     "Age",
#     "Secondary education",
#     "Tertiary education",
#     "EU knowledge",
#     "Interest in politics",
#     "Campaign exposure",
#     "Political discussion",
#     "EU position",
#     "News consumption",
#     "Party of lead candidate",
#     "Manifesto salience",
#     "Twitter reach/votes",
#     "Press release salience")
# )

## -----------------------------------------------------------------------------
## IMPORTANT RUNTIME NOTE: THIS STEP RUNS FOR A LONGER TIME
## -----------------------------------------------------------------------------

# Jackknife, countries (both final models)
countries <- unique(ees$countrycode)
countries_press <- unique(ees_sub$countrycode)

res_full <- data.frame()
res_press <- data.frame()
ees_1 <- ees
ees_1$spitzen_twitter_sub <- NULL

for (i in countries){
  print(i)
  temp <- dplyr::filter(party, country != i)
  temp$spitzen_twitter_sub <- two_sd(temp$spitzen_tw_logratio)
  temp <- dplyr::select(temp, ees, spitzen_twitter_sub)
  ees_temp <- left_join(ees_1, temp, by = c("party_id_code" = "ees"))
  ees_temp$spitzen_twitter_sub[ees_temp$party_id_code == 0] <- min(ees_temp$spitzen_twitter_sub, na.rm = TRUE)
  temp <- filter(ees_temp, avail_twitter == 1 & avail_manifesto == 1 & countrycode != i)
  print(dim(temp))
  print(table(temp$countrycode))
  temp$spitzen_tw_logratio_sd <- temp$spitzen_twitter_sub
  temp_mod <- glmer(rec_count/pot_correct ~ 1 +
                      female + age_pc +
                      secondary + tertiary +
                      euk_pc + int_pc +
                      exposure + dis_pc +
                      eu_pc + new_pc +
                      p_spitz + countrycode +
                      spitzen_man_01 + spitzen_tw_logratio_sd +
                      (1 | party_id), 
                    data = filter(temp, avail_twitter == 1 & avail_manifesto == 1),
                    control = glmerControl(optimizer = "bobyqa",
                                           optCtrl = list(maxfun = 5e6)),
                    weights = pot_correct, family = "binomial")  
  print(tidy(temp_mod)[38:39, c(1:3)])
  res_full <- rbind(res_full, cbind(c(i, i), tidy(temp_mod)[38:39, c(1:3)]))
}
names(res_full)[1] <- "country"
pr_1 <- ggplot(filter(res_full, term == "spitzen_tw_logratio_sd"), aes(x = country, y = estimate,
                                                                       ymin = estimate - 1.96*std.error,
                                                                       ymax = estimate+ 1.96*std.error)) +
  geom_pointrange() + coord_flip() + geom_hline(yintercept = 0, linetype = 2, alpha = 0.5) +
  theme_minimal() +
  labs(
    x = "",
    y = "",
    title    = "Robustness test: final models with one country dropped",
    caption  = "",
    subtitle  = "Twitter outreach")
pr_2 <- ggplot(filter(res_full, term != "spitzen_tw_logratio_sd"), aes(x = country, y = estimate,
                                                                       ymin = estimate - 1.96*std.error,
                                                                       ymax = estimate+ 1.96*std.error)) +
  geom_pointrange() + coord_flip() + geom_hline(yintercept = 0, linetype = 2, alpha = 0.5) +
  theme_minimal() +
  labs(
    x = "",
    y = "",
    title    = "",
    caption  = "",
    subtitle  = "Manifesto salience")

app_jack <- grid.arrange(pr_1, pr_2, ncol = 2,
                         bottom = "Logit coefficients. Country name on y-axis reflects the country that was excluded.")

# export to file

# ggsave(app_jack, file = "../paper-figures/Appendix-jack.pdf",
#        width = 12, height = 6)
# ggsave(app_jack, file = "../paper-figures/Appendix-jack.png",
#        width = 12, height = 6, dpi = 300)

for (i in countries_press){
  print(i)
  temp <- dplyr::filter(party, country != i & avail_twitter == 1 & avail_manifesto == 1 & avail_press == 1)
  temp$spitzen_twitter_sub <- two_sd(temp$spitzen_tw_logratio)
  temp$press_rel_sub <- two_sd(temp$spitzen_mention/temp$n_releases)
  temp <- dplyr::select(temp, ees, spitzen_twitter_sub, press_rel_sub)
  ees_temp <- left_join(ees_1, temp, by = c("party_id_code" = "ees"))
  ees_temp$spitzen_twitter_sub[ees_temp$party_id_code == 0] <- min(ees_temp$spitzen_twitter_sub, na.rm = TRUE)
  ees_temp$press_rel_sub[ees_temp$party_id_code == 0] <- min(ees_temp$press_rel_sub, na.rm = TRUE)
  temp <- filter(ees_temp, avail_twitter == 1 & avail_manifesto == 1 & avail_press == 1 & countrycode != i)
  temp$spitzen_tw_logratio_sd <- temp$spitzen_twitter_sub
  temp$spitzen_press_rel <- temp$press_rel_sub
  print(dim(temp))
  print(table(temp$countrycode))
  temp_mod <- glmer(rec_count/pot_correct ~ 1 +
                           female + age_pc +
                           secondary + tertiary +
                           euk_pc + int_pc +
                           exposure + dis_pc +
                           eu_pc + new_pc +
                           p_spitz + 
                           countrycode +       
                           spitzen_man_01 + 
                           spitzen_tw_logratio_sd + spitzen_press_rel +
                           (1 | party_id), 
                         data = temp,
                         control = glmerControl(optimizer = "bobyqa",
                                                optCtrl = list(maxfun = 5e6)),
                         weights = pot_correct, family = "binomial")
    print(tidy(temp_mod)[18:20, c(1:3)])
    res_press <- rbind(res_press, cbind(c(i, i, i), tidy(temp_mod)[18:20, c(1:3)]))
}

names(res_press)[1] <- "country"

pr_21 <- ggplot(filter(res_press, term == "spitzen_tw_logratio_sd"), aes(x = country, y = estimate,
                                                                       ymin = estimate - 1.96*std.error,
                                                                       ymax = estimate+ 1.96*std.error)) +
  geom_pointrange() + coord_flip() + geom_hline(yintercept = 0, linetype = 2, alpha = 0.5) +
  theme_minimal() +
  labs(
    x = "",
    y = "",
    title    = "Robustness test: final models with one country dropped",
    caption  = "",
    subtitle  = "Twitter outreach")
pr_22 <- ggplot(filter(res_press, term == "spitzen_man_01"), aes(x = country, y = estimate,
                                                                       ymin = estimate - 1.96*std.error,
                                                                       ymax = estimate+ 1.96*std.error)) +
  geom_pointrange() + coord_flip() + geom_hline(yintercept = 0, linetype = 2, alpha = 0.5) +
  theme_minimal() +
  labs(
    x = "",
    y = "",
    title    = "",
    caption  = "",
    subtitle  = "Manifesto salience")
pr_23 <- ggplot(filter(res_press, term == "spitzen_press_rel"), aes(x = country, y = estimate,
                                                                ymin = estimate - 1.96*std.error,
                                                                ymax = estimate+ 1.96*std.error)) +
  geom_pointrange() + coord_flip() + geom_hline(yintercept = 0, linetype = 2, alpha = 0.5) +
  theme_minimal() +
  labs(
    x = "",
    y = "",
    title    = "",
    caption  = "",
    subtitle  = "Press releases")


app_pr <- grid.arrange(pr_21, pr_22, pr_23, ncol = 3,
                       bottom = "Logit coefficients. Country name on y-axis reflects the country that was excluded.")

# export to file

# ggsave(app_pr, file = "../paper-figures/Appendix-press_jack.pdf",
#        width = 12, height = 6)
# ggsave(app_pr, file = "../paper-figures/Appendix-press_jack.png",
#        width = 12, height = 6, dpi = 300)


# Two distribution plots (SI)
p_d1 <- ggplot(filter(ees, avail_twitter == 1 & avail_manifesto == 1), 
                  aes(x = factor(rec_count/pot_correct))) +
  geom_bar() +
  scale_x_discrete(labels = c("0/3", "1/3", "2/3", "3/3")) +
  theme_minimal() +
  labs(
    x = "Recognition",
    y = "",
    title    = "Outcome distribution, voter level",
    caption  = "",
    subtitle  = "")

party_plot <- ees %>% 
  filter(avail_twitter == 1 & avail_manifesto == 1) %>% 
  group_by(countrycode, party_id) %>% 
  summarise(mean_rec = mean(rec_count/pot_correct, na.rm = TRUE),
            pname    = unique(pname_twitter))

p_d2 <- ggplot(party_plot, aes(x = reorder(party_id, -mean_rec), y = mean_rec)) +
  geom_point() +
  theme_minimal() +
  theme(axis.text.x = element_blank()) +
  geom_label_repel(
    aes(x = reorder(party_id, -mean_rec), y = mean_rec, label = pname),
    data = dplyr::filter(party_plot, mean_rec > 0.55), inherit.aes = FALSE,
    size = 2, 
    color = 'white',
    fill = "#5684ce",
    box.padding = 0.5,
    point.padding = 0.75,
    segment.color = 'orange',
    alpha = 1) +
  geom_label_repel(
    aes(x = reorder(party_id, -mean_rec), y = mean_rec, label = pname),
        data = dplyr::filter(party_plot, mean_rec < 0.025), inherit.aes = FALSE,
    size = 2, 
    color = 'white',
    fill = "orange",
    box.padding = 0.5,
    point.padding = 0.75,
    segment.color = '#5684ce',
    alpha = 1) +
   labs(
    y = "Recognition",
    x = "Parties",
    title    = "Outcome distribution, party averages",
    caption  = "",
    subtitle  = "")

app_fig_dist <- grid.arrange(p_d1, p_d2, ncol = 2)

# ggsave(app_fig_dist, file = "../paper-figures/Appendix-dists.pdf",
#        width = 12, height = 6)
# ggsave(app_fig_dist, file = "../paper-figures/Appendix-dists.png",
#        width = 12, height = 6, dpi = 300)

app_fig_man <- filter(ees, avail_manifesto == 1) %>%
  group_by(party_id) %>%
  summarize(man = unique(spitzen_man)) %>%
  ggplot(aes(x = man)) +
  geom_histogram(fill = "white", colour = "grey20") +
  theme_minimal() +
  labs(
    x = "Mentions (as %)",
    y = "Count",
    title    = "Manifesto mentions, %",
    subtitle  = "(party level)")

# ggsave(app_fig_man, file = "../paper-figures/Appendix-manifesto_dist.pdf",
#        width = 6, height = 6)
# ggsave(app_fig_man, file = "../paper-figures/Appendix-manifesto_dist.png",
#        width = 6, height = 6, dpi = 300)